In [1]:

    
from IPython.display import Image
Image(url='http://1.bp.blogspot.com/-ME24ePzpzIM/UQLWTwurfXI/AAAAAAAAANw/W3EETIroA80/s1600/drop_shadows_background.png',
      width=1000, height=1000)









    Out[1]:

Clasificación

SVM



In [1]:

    
import numpy
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap









    



/usr/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2499: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
  VisibleDeprecationWarning)

Creamos nuestro dataset y sus respectivas etiquetas para el entrenamiento



In [2]:

    
xs = np.array([[np.random.randint(i-10, i), np.random.randint(i-10, i)] for i in [10, 20] for _ in range(100)])
y = [i for i in [1, 2] for _ in range(100)]
print xs.shape, y









    



(200, 2) [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]

Entrenamos ...



In [3]:

    
sv = SVC(kernel='linear') #kernel='rbf'



In [4]:

    
sv.fit(xs, y)









    Out[4]:





SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

Y consultamos la ubicación de un nuevo individuo ..



In [5]:

    
sv.predict([17,5])









    Out[5]:





array([2])

Obtenemos la función de decisión para observar la clasificación obtenida



In [6]:

    
XX_svm, YY_svm = np.mgrid[0:20:1000j, 0:20:1000j]
Z_svm = sv.decision_function(np.c_[XX_svm.ravel(), YY_svm.ravel()])
Z_svm = Z_svm.reshape(XX_svm.shape)
print Z_svm









    



[[ -9.5         -9.48998999  -9.47997998 ...,   0.47997998   0.48998999
    0.5       ]
 [ -9.48998999  -9.47997998  -9.46996997 ...,   0.48998999   0.5
    0.51001001]
 [ -9.47997998  -9.46996997  -9.45995996 ...,   0.5          0.51001001
    0.52002002]
 ..., 
 [  0.47997998   0.48998999   0.5        ...,  10.45995996  10.46996997
   10.47997998]
 [  0.48998999   0.5          0.51001001 ...,  10.46996997  10.47997998
   10.48998999]
 [  0.5          0.51001001   0.52002002 ...,  10.47997998  10.48998999
   10.5       ]]



In [7]:

    
plt.plot(xs[:100, 0],xs[:100, 1],'ro')
plt.plot(xs[100:, 0],xs[100:, 1],'go')

plt.contour(XX_svm, YY_svm, Z_svm, colors=['k', 'k', 'k'], linestyles=['--', '-', '--'], levels=[-.5, 0, .5])
plt.pcolormesh(XX_svm, YY_svm, Z_svm > 0,  cmap=plt.cm.Paired)
plt.xlim(0, 20)
plt.ylim(0, 20)









    Out[7]:





(0, 20)

Ahora cambie el tipo de kernel de la SVM y vuelva a graficar, que cambio?

K-neighbors



In [8]:

    
from sklearn.neighbors import KNeighborsClassifier, NearestCentroid



In [9]:

    
kn = KNeighborsClassifier(n_neighbors=6, weights='uniform')



In [10]:

    
kn.fit(xs, y)









    Out[10]:





KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           n_neighbors=6, p=2, weights='uniform')



In [11]:

    
XX, YY = np.mgrid[0:20:1000j, 0:20:1000j]
Z = kn.predict(np.c_[XX.ravel(), YY.ravel()])
Z = Z.reshape(XX.shape)



In [12]:

    
plt.plot(xs[:100, 0],xs[:100, 1],'ro')
plt.plot(xs[100:, 0],xs[100:, 1],'go')
plt.pcolormesh(XX, YY, Z,  cmap=plt.cm.Paired)
plt.xlim(0, 20)
plt.ylim(0, 20)









    Out[12]:





(0, 20)

Clustering

K-Means



In [13]:

    
from sklearn.cluster import KMeans, MeanShift, estimate_bandwidth
from sklearn.datasets.samples_generator import make_blobs



In [14]:

    
km = KMeans(n_clusters=10, n_jobs=2) # Cuantas clases queremos? cambiar n_clusters



In [15]:

    
km.fit(xs, y)









    Out[15]:





KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=10, n_init=10,
    n_jobs=2, precompute_distances=True, random_state=None, tol=0.0001,
    verbose=0)



In [16]:

    
XX_km, YY_km = np.mgrid[0:20:1000j, 0:20:1000j]
Z_km = km.predict(np.c_[XX_km.ravel(), YY_km.ravel()])
Z_km = Z_km.reshape(XX_km.shape)
print Z_km









    



[[5 5 5 ..., 3 3 3]
 [5 5 5 ..., 3 3 3]
 [5 5 5 ..., 3 3 3]
 ..., 
 [0 0 0 ..., 6 6 6]
 [0 0 0 ..., 6 6 6]
 [0 0 0 ..., 6 6 6]]



In [17]:

    
plt.plot(xs[:100, 0],xs[:100, 1],'ro')
plt.plot(xs[100:, 0],xs[100:, 1],'go')
plt.pcolormesh(XX_km, YY_km, Z_km,  cmap=plt.cm.Paired)
plt.xlim(0, 20)
plt.ylim(0, 20)









    Out[17]:





(0, 20)

MeanShift



In [18]:

    
xs_circle, _ = make_blobs(n_samples=1000, centers=[[3,3], [2,2], [1,1]], cluster_std=0.4)



In [19]:

    
bandwidth = estimate_bandwidth(xs_circle, quantile=0.2, n_samples=1000)
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)



In [20]:

    
ms.fit(xs_circle)









    Out[20]:





MeanShift(bandwidth=0.70798162587847879, bin_seeding=True, cluster_all=True,
     min_bin_freq=1, seeds=None)



In [21]:

    
cluster_centers = ms.cluster_centers_
labels = ms.labels_
print labels.shape
print cluster_centers









    



(1000,)
[[ 2.1063621   2.08495504]
 [ 0.99266406  0.9702425 ]
 [ 3.01717131  2.95103879]]



In [22]:

    
colors = ['ro', 'go', 'yo']
map(lambda p: plt.plot(p[0][0], p[0][1], colors[p[1]]), zip(xs_circle, labels))
map(lambda c: plt.plot(c[0], c[1], 'bo', markeredgecolor='k', markersize=14), cluster_centers)

print